This exercise performs a back-testing for a MA strategy. First, I find the optimal MA parameters (number of look-back days for the two moving averages) by maximising trading profits over the first part of the sample (in-sample backtesting). Then, I use these optimal parameters to trade in the second part of the sample (out-of-sample backtesting).
In-sample back-testing results can never be achieved in live trading. Out-of-sample can be achieved in theory, gross of trading costs and slippage.
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import pandas_datareader as web
import datetime as dt
import plotly.io as pio
pio.renderers.default = 'jupyterlab+svg'
import scipy.stats
CAD=web.DataReader('CADUSD=X', 'yahoo', start = '08/31/2010')
#sp500.head()
CAD.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 2643 entries, 2010-08-30 to 2020-10-22 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 High 2643 non-null float64 1 Low 2643 non-null float64 2 Open 2643 non-null float64 3 Close 2643 non-null float64 4 Volume 2643 non-null float64 5 Adj Close 2643 non-null float64 dtypes: float64(6) memory usage: 144.5 KB
#Using Plotly Graphical Objects
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=CAD.index, y=CAD.Close, name='CAD/USD'))
fig1.update_layout(title="CAD/USD Exchange Rate", xaxis_type="date", yaxis_type="log")#?
fig1.show()
#Using Plotly Express
import plotly.express as px
fig2 = px.line(x=CAD.index, y=CAD.Close, title = 'CADUSD Exchange Rate')
fig2.show()
#We will also compare the returns to those of a Long-only exposure to Canadian dollar.
price = np.array(CAD['Close'])
LongOnlyReturn = np.log(CAD['Close']/(CAD['Close']).shift(1))
LongOnly = np.exp(np.cumsum(LongOnlyReturn)) #Cumulative return of Long-Only Strategy
#This is the first definition of the MA function, from textbooks. It applies to the whole time-series. I think I shall use it to compare fully in-sample with out-of sample returns.
#It calculates the exposures over the whole period for given MA parameters i and j. Then it calculates the returns arizing from these exposures, and evaluates a maximization criterion, total return divided by standard deviation.
# (In a future version I'll replace standard deviation by asymmetric deviation.)
def MAstrat0(i,j, SD=0.00):
MA1 = np.array(CAD['Close'].rolling(i).mean())
MA2 = np.array(CAD['Close'].rolling(j).mean())
buy = MA1-MA2
babs = np.abs(buy)
babs[np.isnan(babs)] = 0
buy[babs < SD] = 0
buy[np.isnan(buy)] = 0
Long = np.sign(buy)
MAreturn = Long[:-1]*np.array(LongOnlyReturn[1:])
MAstrategy = np.exp(np.cumsum(MAreturn))
MAcrit = np.sum(MAreturn)/np.std(MAreturn) #the criterion for choosing the MA parameters
return MAcrit, MAreturn, MAstrategy, MA1, MA2, Long
MAcrit0 = MAstrat0(7,20,0.00)[0]; print(MAcrit0)
17.380268857510238
N = 1000 #length of in-sample test window
ISprice = np.array(CAD['Close'][:N]) #from 0 to N-1, in-sample prices
#retN = LongOnlyReturn[:N-1]
#The second definition, below, calculates exposures, returns, and maximixation criterion on a subsample of prices, from inception to N-1.
def MAstrat1(ISprice, i,j, SD=0.00):
#the moving averages are calculated through matrix convolutions
MA1 = np.convolve(ISprice, np.ones(i), 'valid')/i
MA2 = np.convolve(ISprice, np.ones(j), 'valid')/j
realSize = min(MA1.shape[0], MA2.shape[0])
#buy size is realSize
buy = MA1[(MA1.shape[0]-realSize):]-MA2[(MA2.shape[0]-realSize):]
buy[np.abs(buy) < SD] = 0 #we can introduce a minimum threshold for trading
#Long size is realSize
Long = np.sign(buy)
offset = max(i, j)
MAreturn = Long[:-1]*np.array(LongOnlyReturn[offset:len(ISprice)])
MAstrategy = np.exp(np.cumsum(MAreturn)) #not necessary here
MAcrit = np.sum(MAreturn)/np.std(MAreturn) #the maximization criterion is a sum or average scaled by standard deviation
return MAcrit, MAreturn, MAstrategy, MA1, MA2, Long
MAcrit1 = MAstrat1(ISprice,7,20)[0]; print(MAcrit1)
-27.18040919303611
The two methods give the same optimization result on the entire series. However, the second method should be faster, because it gets rid of the rolling mean calculation with Pandas.
#These parameters will be applied to out-of-sample testing
MAmaxcrit = -np.Inf
n=250 #maximum values for i and j
MACritij = np.zeros((n-1,n-1))
for i in range(1, n):
for j in range(i+1, n):
MAcrit = MAstrat1(ISprice, i,j, 0.00)[0]
MACritij[i-1,j-1] = MAcrit
if MAcrit>MAmaxcrit:
MAmaxcrit = MAcrit
imax = i
jmax = j
print(f'i={imax}, j={jmax}, Maximization criterion = {MAmaxcrit}')
i=37, j=51, Maximization criterion = 51.6431440827211
# There are various ways to plot
z = MACritij
fig = go.Figure(data=[go.Surface(z=z)])
fig.update_layout(title='MA Parameters Heatmap for Initial In-sample Window',
scene_camera_eye=dict(x=1, y=-1.5, z=0.25),
autosize=False,
width=600, height=600,
margin=dict(l=65, r=50, b=65, t=90))
fig.show()
#Pure in-sample testing applies the optimization criterion to the whole data series.
MAmaxcrit = -np.Inf
n=250 #maximum values for i and j
MACritij = np.zeros((n-1,n-1))
for i in range(1, n):
for j in range(i+1, n):
MAcrit = MAstrat0(i,j, 0.00)[0]
MACritij[i-1,j-1] = MAcrit #(because the number of days lookback is matrix index+1)
if MAcrit>MAmaxcrit:
MAmaxcrit = MAcrit
ISimax = i
ISjmax = j
print(f'i={ISimax}, j={ISjmax}, Maximization criterion = {MAmaxcrit}')
i=36, j=49, Maximization criterion = 101.39705903150535
z = MACritij
#sh_0, sh_1 = z.shape
x, y = np.linspace(0, 1, n-1), np.linspace(0, 1, n-1)
fig = go.Figure(data=[go.Surface(z=z, x=x, y=y)])
fig.update_layout(title='MA Parameters Heatmap for the full series',
scene_camera_eye=dict(x=1, y=-1.5, z=0.25),
autosize=False,
width=500, height=500,
margin=dict(l=65, r=50, b=65, t=90))
fig.show()
It appears the optimal parameters are found in a small island above water, which makes me pesimistic about the success of the out-of-sample backtesting.
i = imax
j = jmax
#Out-of-sample only:
OSprice = np.array(CAD['Close'][N:]) #from N to the end, ou-of-sample prices
OSMAcrit, OSMAreturn, OSMAstrategy, OSMA1, OSMA2, OSLong = MAstrat1(OSprice, i,j, 0.00)
#in-sample only for the first N days
ISprice = np.array(CAD['Close'][:N])
ISMAcrit, ISMAreturn, ISMAstrategy, ISMA1, ISMA2, ISLong = MAstrat1(ISprice, i,j, 0.00)
#print(ISMAcrit)
#Combined in-sample with out-of-sample:
MAcrit, MAreturn, MAstrategy, MA1, MA2, Long = MAstrat1(price, i,j, 0.00)
print(MAcrit)
#Fully in-sample over the whole period:
IMAcrit, IMAreturn, IMAstrategy, IMA1, IMA2, ILong = MAstrat0(ISimax,ISjmax, 0.00)
print(IMAcrit)
90.45994573984771 101.39705903150535
fig = go.Figure()
fig.add_trace(go.Scatter(x=CAD.index, y=LongOnly, name='Long-only'))
fig.add_trace(go.Scatter(x=CAD.index, y=IMAstrategy[ISjmax:], name='MA Strategy - Fully In Sample', line_color='#FF0000'))
fig.add_trace(go.Scatter(x=CAD.index, y=MAstrategy, name='MA Strategy - Out of Sample', line_color='#008000'))
fig.add_trace(go.Scatter(x=CAD.index, y=ISMAstrategy, name='MA Strategy - In Sample', line_color='#FF0000'))
fig.update_layout(title="MA Trading Strategy")
fig.show()
#to redo the function output
layout = go.Layout(title='MA Combined Trading Strategy', yaxis=dict(), yaxis2=dict(overlaying='y', side='right'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(x=CAD.index, y=CAD['Close'], name='CAD/USD', yaxis='y2'))
fig.add_trace(go.Scatter(x=CAD.index, y=MA1, name='MA1',yaxis='y2'))
fig.add_trace(go.Scatter(x=CAD.index, y=MA2, name='MA2',yaxis='y2'))
fig.add_trace(go.Scatter(x=CAD.index, y=Long, name='Regime', yaxis='y1'))
fig.show()
The graphs show only small differences between in-sample and out-of sample testing. The statistics below add more precision to the story.
#MAreturn = MAreturn[~np.isnan(MAreturn)] #but there shoudn't be NaNs.
#Purely In-Sample returns
x = scipy.stats.describe(IMAreturn)
y= dict(x._asdict())
y["min"] = y["minmax"][0]
y["max"] = y["minmax"][1]
y.pop("minmax")
y["return/risk"] = y["mean"]/np.sqrt(y["variance"])
y
{'nobs': 2642,
'mean': 0.00017973493349219882,
'variance': 2.194038842121927e-05,
'skewness': 0.1030226250126977,
'kurtosis': 1.5294314280595227,
'min': -0.020188250301184856,
'max': 0.02189155609072135,
'return/risk': 0.038371638063726086}
#Out-of-Sample returns
x = scipy.stats.describe(OSMAreturn)
y= dict(x._asdict())
y["min"] = y["minmax"][0]
y["max"] = y["minmax"][1]
y.pop("minmax")
y["return/risk"] = y["mean"]/np.sqrt(y["variance"])
y
{'nobs': 1592,
'mean': 0.00012063319213572715,
'variance': 2.453790477915984e-05,
'skewness': 0.12572238369674574,
'kurtosis': 1.180965819495194,
'min': -0.020188250301184856,
'max': 0.02189155609072135,
'return/risk': 0.024352753994514687}
plotarray=[go.Histogram(x=IMAreturn, nbinsx=50, marker_color='#FF0000')]
figlayout={'title':'Distribution of MA In-Sample Strategy returns'}
fig = go.Figure(data=plotarray, layout=figlayout)
fig.show()
plotarray=[go.Histogram(x=OSMAreturn, nbinsx=50, marker_color='#008000')]
figlayout={'title':'Distribution of MA Out-of-Sample Strategy returns'}
fig = go.Figure(data=plotarray, layout=figlayout)
fig.show()
def Rolling(data,n):
#It calculates n-rolling return and standard deviation based on data.
RollRet = np.zeros(len(data)-n)
RollStd = np.zeros(len(data)-n)
for i in range(len(data)-n):
RollRet[i] = np.convolve(data[i:i+n], np.ones(n), 'valid')/n
RollSqRet = np.convolve(data[i:i+n]**2, np.ones(n), 'valid')/n
RollVar = RollSqRet-RollRet[i]**2
RollStd[i] = np.sqrt(RollVar)
return RollRet, RollStd
n=252 #(aproximately 1 year)
data = MAreturn
RollRet = Rolling(data,n)[0]
RollStd = Rolling(data,n)[1]
MeanRet = np.mean(RollRet)*252
StdDev = np.mean(RollStd)*np.sqrt(252)
print(f"Average 1-year Rolling Return: {MeanRet:.4%} p.a.")
print(f"Average 1-year Standard Deviation: {StdDev:.4%} p.a.")
print(f"Return/Risk Ratio: {MeanRet/StdDev:.2}")
Average 1-year Rolling Return: 3.7432% p.a. Average 1-year Standard Deviation: 7.3153% p.a. Return/Risk Ratio: 0.51
data = ISMAreturn
ISRollRet = Rolling(data,n)[0]
ISRollStd = Rolling(data,n)[1]
MeanRet = np.mean(ISRollRet)*252
StdDev = np.mean(ISRollStd)*np.sqrt(252)
print(f"Average in-sample 1-year Rolling Return: {MeanRet:.4%} p.a.")
print(f"Average in-sample 1-year Standard Deviation: {StdDev:.4%} p.a.")
print(f"Return/Risk Ratio: {MeanRet/StdDev:.2}")
Average in-sample 1-year Rolling Return: 5.4865% p.a. Average in-sample 1-year Standard Deviation: 7.3060% p.a. Return/Risk Ratio: 0.75
layout = go.Layout(title='Annualized Rolling Performance', xaxis_type="date", yaxis=dict(tickformat=".2%"),
legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="right",x=1))
fig2 = go.Figure(layout=layout)
fig2.add_trace(go.Scatter(x=CAD.index[n:], y=RollRet*252, name='Rolling Return', line_color='#008000'))
fig2.add_trace(go.Scatter(x=CAD.index[n:], y=ISRollRet*252, name='In Sample Rolling Return', mode='markers', line_color='#008000'))
fig2.add_trace(go.Scatter(x=CAD.index[n:], y=RollStd*np.sqrt(252), name='Rolling Standard Deviation', line_color='#FF0000'))
fig2.add_trace(go.Scatter(x=CAD.index[n:], y=ISRollStd*np.sqrt(252), name='In Sample Rolling Standard Deviation', mode='markers', line_color='#FF0000'))
#fig2.update_layout(title="CAD/USD", xaxis_type="date", yaxis_type="log")#?
fig2.show()
As expected, the Out-of-Sample backtested performance does not beat In-sample backtesting. The performance is less bad than I expected, except for a long drawdown of about 17%, lasting aproximately five months.
This strategy could be improved by: